package extractorCode;


import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


public class atlasshrugs {
	
	private final static int INITIAL_BUF_SIZE = 65536;
	public static void main(String args[]) throws Exception
	{
	
		int count=0;
	
		for(int i=1;i<447;i++)
		{	
		System.out.println("page"+i);	
		String page = (fetchPage("http://www.juancole.com/page/"+i));
		//System.out.println(page);
		page= page.replaceAll(">\\s*<", "><");
		
		page = page.replaceAll("\t", "");
		page = page.replaceAll("\b", "");
		page = page.replaceAll("\r", "");
		page = page.replaceAll("\n", "");
		page = page.replaceAll("\r\n", "");
		Matcher newsItem = Pattern.compile("<div class=\"entry-body\">(.*?)</div><div class=\"entry-footer\">", Pattern.CASE_INSENSITIVE).matcher( page );
		
		
	while(newsItem.find())
		{
	//	System.out.println(newsItem.group(1));
		
		String text = newsItem.group(1);
		//System.out.println(text);
		text = text.replaceAll("\\<.*?>","");
		text = text.replaceAll("[^\\p{ASCII}]", "");
		BufferedWriter wr = new BufferedWriter( new FileWriter("C:\\new data\\atlasshrugs\\"+(count++)+".txt"));
		wr.write(text);
		//System.out.println(text);
		wr.close();
		
		//System.out.println(text);
	//	writer.write("http://newsbusters.org"+newsItem.group(1)+"\n");
		}
		}
		

	}
	
	private static String fetchPage(String urlString){
		StringBuilder buffer = new StringBuilder(INITIAL_BUF_SIZE);
		try {
			URL url = new URL( urlString );
			BufferedInputStream reader = new BufferedInputStream(url.openStream());
			int temp = reader.read();
			while(temp != -1){
				buffer.append((char)temp);
				temp = reader.read();
			}
		} catch (MalformedURLException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		return buffer.toString();
	}

}
